library(tidyverse)
library(ggrastr)

load("data/plotting_data.RData")

LABELED_DOMAINS <- c(
    "mediaite.com", "wsj.com", "foxnews.com", "buzzfeednews.com", "nypost.com", 
    "dailymail.co.uk", "hollywoodreporter.com", "realclearpolitics.com", "redstate.com", 
    "politifact.com", "judicialwatch.org", "nytimes.com", "huffpost.com", "usatoday.com", 
    "rollcall.com", "cbsnews.com", "reason.com", "axios.com", "inc.com", 
    "washingtontimes.com", "telegraph.co.uk", "rt.com", "nationalreview.com", 
    "wordpress.com", "blogspot.com", "cbslocal.com", "youtube.com", 
    "dailykos.com", "breitbart.com")

prepare_bubble_plot_data <- function(df, thresh, sim = FALSE) {
    bubble_domain_df <- 
        as_tibble(df) |>
        group_by(domain) |>
        summarise(m_bubble = mean((sub_sig_left + sub_sig_right) > 0),
                  m_bubble_sim = mean((inference == 'sub')|(str_detect(as.character(inference), "^Substantively Significantly (?:Left|Right)"))),
                 total_shares = sum(n_shares),
                 total_urls = n()) %>%
        left_join(domains, by = "domain") %>%
        filter(!(domain %in% c("blogspot.com", "instagram.com"))) |>
        filter(total_urls >= thresh)
}

make_bubble_plot <- function (df, thresh, sim = FALSE) {
    bubble_domain_df <- prepare_bubble_plot_data(df, thresh, sim)

    if (sim == TRUE) {
        p <- ggplot(bubble_domain_df, aes(x = domain_score, y = m_bubble_sim))
    } else {
        p <- ggplot(bubble_domain_df, aes(x = domain_score, y = m_bubble))
    }

    p +  
    rasterize(geom_point(aes(size = total_shares),
                alpha = .25), dpi = 300)+
    scale_y_continuous(labels = scales::percent) +
    scale_size_continuous(name = "Total Shares")+
    ggrepel::geom_text_repel(aes(label = ifelse(domain %in% LABELED_DOMAINS, domain, NA)),
            col = "red",
            seed = 23423423,
            min.segment.length = 0,
            size= 4)+
    labs(
        x = "Domain Score",
        y = "Proportion URLs Substantively Distinct from Domain\n(99% Interval)") +
    theme_classic()+
    geom_smooth(se = FALSE, alpha = 0.5) +
    coord_cartesian(xlim = c(-1, 1)) +
    theme(text = element_text(family = "serif", size = 12),
        plot.title = element_text(size = 16, face = "bold"),
        legend.position = "right",
        legend.direction = "vertical",
        panel.grid.major.y = element_line(color="grey90"),
        panel.grid.minor.y = element_line(color="grey96"))
    }



bubble_domains <- make_bubble_plot(urls_df, quantile(domains$n_urls, 0.75))
ggsave(bubble_domains, file = "results/fig_7.pdf", width = 10, height = 8)
ggsave(bubble_domains, file = "results/tw_bubble_domains.pdf", width = 10, height = 8)

bubble_domains <- make_bubble_plot(urls_df, quantile(domains$n_urls, 0.75), sim = TRUE)
ggsave(bubble_domains, file = "results/fig_f2.pdf", width = 10, height = 8)
ggsave(bubble_domains, file = "results/tw_bubble_domains_sim.pdf", width = 10, height = 8)

# not included in manuscript due to appendix page limits
# bubble_domains_no_thresh <- make_bubble_plot(urls_df, quantile(domains$n_urls, 0)) 
# ggsave(bubble_domains_no_thresh, file = "results/tw_bubble_domains_no_thresh.pdf", width = 10, height = 8)

# bubble_domains_median_thresh <- make_bubble_plot(urls_df, quantile(domains$n_urls, 0.5)) 
# ggsave(bubble_domains_no_thresh, file = "results/tw_bubble_domains_median_thresh.png", width = 10, height = 8)

# bubble_domains_high_thresh <- make_bubble_plot(urls_df, quantile(domains$n_urls, 0.9)) 
# ggsave(bubble_domains_high_thresh, file = "results/tw_bubble_domains_high_thresh.png", width = 10, height = 8)

domain_df <- read_tsv("data/raw/domain_reference_table.tsv")
bubble_domain_df <- prepare_bubble_plot_data(urls_df, quantile(domains$n_urls, 0))
mdf <- bubble_domain_df |> 
    rename(pct_in_bubble = m_bubble) |> 
    mutate(domain_extremity = abs(domain_score)) |> 
    inner_join(domain_df |> select(domain, pct_political, num_shares))

bubble_model <- lm(pct_in_bubble ~ domain_extremity + n_urls + I(log10(num_shares)) , data = mdf)
modelsummary::modelsummary(bubble_model, output = "results/table_h1a.tex", stars = TRUE, fmt = 3)
